Calculate the RMS values for a given dataset and fit a gaussian on it


In [ ]:
from tkp.db.model import Image
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from scipy.stats import norm
from collections import defaultdict
import matplotlib
%matplotlib inline
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

settings


In [ ]:
host = 'localhost'
port = 5432
user = 'gijs'
password = 'gijs'
database = 'gijs'
dataset_id = 1
print_query = False
sigma_thresh = 4

Helper functions


In [ ]:
def rms_histogram(all_rms, sigma_thresh, frequency_name):
    """
    args:
        x: an array of RMS values
    """
    bin_num = 50
    # best fit of data
    (mu, sigma) = norm.fit(all_rms)

    # the histogram of the data
    n, bins, patches = plt.hist(all_rms, bin_num, facecolor='green', normed=1, alpha=0.6)

    # add a 'best fit' line
    y = mlab.normpdf( bins, mu, sigma)
    l = plt.plot(bins, y, 'r--', linewidth=2)

    thres_low = mu - sigma * sigma_thresh
    thres_high = mu + sigma * sigma_thresh

    plt.xlabel('RMS')
    plt.ylabel('Number if images in bin')
    t = r'$f={}\ \mu={:.3f},\ \sigma={:.3f},\ t={}\ t_{{\uparrow}}={:.3f}\ t_{{\downarrow}}={:.3f}$'
    plt.title(t.format(frequency_name, mu, sigma, sigma_thresh, thres_low, thres_high))

    plt.axvline(x=thres_low, linewidth=2, color='k', linestyle='--')
    plt.axvline(x=thres_high, linewidth=2, color='k', linestyle='--')
    plt.grid(True)
    plt.xlim(mu - sigma * sigma_thresh * 1.5, mu + sigma * sigma_thresh * 1.5)

    plt.show()

setup connections and get images


In [ ]:
param = ('postgresql', user, password, host, port, database)
alchemy_engine = create_engine('%s://%s:%s@%s:%s/%s' % param, echo=print_query)
Session = sessionmaker(bind=alchemy_engine)
session = Session()
images = session.query(Image).filter(Image.dataset_id == dataset_id).all()
print("number of images: %s" % len(images))

Prepare data and render plots


In [ ]:
freqs = defaultdict(list)
all_rms = []
for i in images:
    all_rms += [i.rms_qc]
    freqs[i.band.freq_central].append(i.rms_qc)
    
rms_values = [i.rms_qc for i in images]
frequency_name = 'all'
rms_histogram(all_rms, sigma_thresh, frequency_name)

for freq, values in freqs.items():
    frequency_name = str(freq)
    rms_histogram(values, sigma_thresh, frequency_name)